library(reticulate)

data extraction

import pandas as pd
import numpy as np
import os

# read data
countypres = pd.read_csv('~/data/countypres_2000-2016.csv', dtype={'FIPS': 'Int64'}).dropna(subset=['FIPS'])

countypres = countypres[countypres.year.isin([2008, 2012, 2016])].copy()
countypres['proportion'] = countypres.candidatevotes/countypres.totalvotes
votes = countypres[countypres.party == 'democrat'].copy()
votes['democrat_proportion'] = list(countypres.loc[countypres.party == 'democrat', 'proportion'])
votes['republican_proportion'] = list(countypres.loc[countypres.party == 'republican', 'proportion'])
votes['FIPS'] = [str(i).zfill(5) for i in votes.FIPS] # make county FIPS consist of 5 digits
votes = votes[['year', 'state_po', 'county', 'FIPS', 'democrat_proportion', 'republican_proportion']]

unemployment = pd.read_excel('~/data/Unemployment.xls', header=7).dropna(subset=['FIPStxt'])
unemployment
##       FIPStxt  ... Med_HH_Income_Percent_of_State_Total_2018
## 0           0  ...                                       NaN
## 1        1000  ...                                100.000000
## 2        1001  ...                                118.959123
## 3        1003  ...                                115.450773
## 4        1005  ...                                 68.928049
## ...       ...  ...                                       ...
## 3270    72145  ...                                       NaN
## 3271    72147  ...                                       NaN
## 3272    72149  ...                                       NaN
## 3273    72151  ...                                       NaN
## 3274    72153  ...                                       NaN
## 
## [3275 rows x 88 columns]
unemployment = unemployment[[',' in i for i in unemployment.area_name]].copy() # only keep data for counies (not for states and countries)
unemployment['state_po'] = [str(i).split(', ')[1] for i in unemployment.area_name]
unemployment['county'] = [str(i).split(', ')[0] for i in unemployment.area_name]
unemployment['FIPS'] = [str(i).zfill(5) for i in unemployment.FIPStxt] # make county FIPS consist of 5 digits

unemployment_rate = pd.concat([unemployment]*3)
unemployment_rate['year'] = np.repeat([2008, 2012, 2016], len(unemployment))
unemployment_rate['unemployment_rate'] = pd.concat([unemployment.Unemployment_rate_2008, 
                                                    unemployment.Unemployment_rate_2012, unemployment.Unemployment_rate_2016])
unemployment_rate = unemployment_rate[['state_po', 'county', 'FIPS', 'year', 'unemployment_rate']]

extract = pd.DataFrame(pd.merge(votes, unemployment_rate, how='outer', on=['FIPS', 'year']))
extract.insert(1, 'county', extract.apply(lambda x: x.county_y if pd.isna(x.county_x) else x.county_x, axis=1))
extract.insert(2, 'state', extract.apply(lambda x: x.state_po_y if pd.isna(x.state_po_x) else x.state_po_x, axis=1))

data_clean = extract.drop(['state_po_x', 'state_po_y', 'county_x', 'county_y'], axis=1)
data_clean['state'] = np.array(data_clean['state'])
vec<-unlist(py$data_clean$state)
py$data_clean$state = vec
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(spData)
library(gridExtra)
library(readxl)
library(maps)
library(ggplot2)
library(spDataLarge)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::combine() masks gridExtra::combine()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## x purrr::map()     masks maps::map()
# load shhape file
shape = st_read('~/desktop/data/tl_2019_us_county-1')
## Reading layer `tl_2019_us_county' from data source `/Users/liuhao/Desktop/data/tl_2019_us_county-1' using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## geographic CRS: NAD83
# plot data prepare
data=merge(shape, py$data_clean, by.x='GEOID', by.y='FIPS')
counties = st_as_sf(maps::map('county', plot = F, fill = T))
# map data
states = st_as_sf(maps::map('state', plot = F, fill = T)) %>% 
  cbind(st_coordinates(st_centroid(.))) %>% 
  mutate(ID = str_to_title(ID))
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data

Texas Democrat Voting and Unemployment Map

p1 =ggplot()+geom_sf(data=us_states, fill=NA)+
  geom_sf(data=data %>% filter(state =='TX') ,aes(fill=democrat_proportion))+
  geom_label(data=states %>% filter(ID!='Texas'), 
             aes(X, Y, label = ID), size = 3)+
  facet_wrap(~year, ncol =3)+
  scale_fill_distiller(name = 'democrat_proportion', palette = 'PuBuGn')+
  coord_sf(xlim = c(-107, -93), ylim = c(25, 37), expand = F)+
  labs(x = 'Lat', y = 'Long', fill = 'democrat vote proportion')

p2=ggplot()+geom_sf(data=us_states, fill=NA)+
  geom_sf(data=data %>% filter(state =='TX'), aes(fill=unemployment_rate))+
  geom_label(data=states %>% filter(ID!='Texas'), 
             aes(X, Y, label = ID), size = 3)+
  facet_wrap(~year, ncol =3)+
  scale_fill_distiller(name = 'unemployment_rate', palette = 'Spectral')+
  coord_sf(xlim = c(-107, -93), ylim = c(25, 37), expand = F)+
  labs(x = 'Lat', y = 'Long', fill = 'unemployment rate')

grid.arrange(p1,p2, nrow=2,ncol=1,top = 'Texas Voting Map and Unemployment Rate')

California Democrat Voting and Unemployment Map

p1 =ggplot()+geom_sf(data=us_states, fill=NA)+
  geom_sf(data=data %>% filter(state =='CA') ,aes(fill=democrat_proportion))+
  geom_label(data=states %>% filter(ID!='California'), 
             aes(X, Y, label = ID), size = 3)+
  facet_wrap(~year, ncol =3)+
  scale_fill_distiller(name = 'democrat_proportion', palette = 'PuBuGn')+
  coord_sf(xlim = c(-125, -113), ylim = c(29, 43), expand = F)+
  labs(x = 'Lat', y = 'Long', fill = 'democrat vote proportion')

p2=ggplot()+geom_sf(data=us_states, fill=NA)+
  geom_sf(data=data %>% filter(state =='CA'), aes(fill=unemployment_rate))+
  geom_label(data=states %>% filter(ID!='California'), 
             aes(X, Y, label = ID), size = 3)+
  facet_wrap(~year, ncol =3)+
  scale_fill_distiller(name = 'unemployment_rate', palette = 'Spectral')+
  coord_sf(xlim = c(-125, -113), ylim = c(29, 43), expand = F)+
  labs(x = 'Lat', y = 'Long', fill = 'unemployment rate')

grid.arrange(p1,p2, nrow=2,ncol=1,top = 'California Voting Map and Unemployment Rate')

Pennsylvania Democrat Voting and Unemployment Map

p1 =ggplot()+geom_sf(data=us_states, fill=NA)+
  geom_sf(data=data %>% filter(state =='PA') ,aes(fill=democrat_proportion))+
  geom_label(data=states %>% filter(ID!='Pennsylvania'), 
             aes(X, Y, label = ID), size = 3)+
  facet_wrap(~year, ncol =3)+
  scale_fill_distiller(name = 'democrat_proportion', palette = 'PuBuGn')+
  coord_sf(xlim = c(-81, -74), ylim = c(39,44), expand = F)+
  labs(x = 'Lat', y = 'Long', fill = 'democrat vote proportion')

p2=ggplot()+geom_sf(data=us_states, fill=NA)+
  geom_sf(data = data %>% filter(state =='PA'), aes(fill=unemployment_rate))+
  geom_label(data=states %>% filter(ID!='Pennsylvania'), 
             aes(X, Y, label = ID), size = 3)+
  facet_wrap(~year, ncol =3)+
  scale_fill_distiller(name = 'unemployment_rate', palette = 'Spectral')+
  coord_sf(xlim = c(-81, -74), ylim = c(39,44), expand = F)+
  labs(x = 'Lat', y = 'Long', fill = 'unemployment rate')

grid.arrange(p1,p2, nrow=2,ncol=1,top = 'Pennsylvania Voting Map and Unemployment Rate')